{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D2H6YLCPOXvZ",
        "outputId": "ef400751-ab62-4ced-e961-e690bd2e98fd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting openai\n",
            "  Downloading openai-1.35.13-py3-none-any.whl (328 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m328.5/328.5 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n",
            "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n",
            "Collecting httpx<1,>=0.23.0 (from openai)\n",
            "  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (2.8.0)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n",
            "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.4)\n",
            "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai) (4.12.2)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2024.6.2)\n",
            "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n",
            "  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n",
            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
            "Requirement already satisfied: pydantic-core==2.20.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai) (2.20.0)\n",
            "Installing collected packages: h11, httpcore, httpx, openai\n",
            "Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 openai-1.35.13\n"
          ]
        }
      ],
      "source": [
        "!pip install openai"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!gdown 16FSh45xge5RybunR6-4tER8Vi_pXXx4a # download the entire dataset"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IM--HAJWVJ0D",
        "outputId": "ffba87e2-be07-40c0-a2a3-608e686d5033"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Downloading...\n",
            "From: https://drive.google.com/uc?id=16FSh45xge5RybunR6-4tER8Vi_pXXx4a\n",
            "To: /content/German_French_UK_China_MENA_040420.csv\n",
            "\r  0% 0.00/2.88M [00:00<?, ?B/s]\r100% 2.88M/2.88M [00:00<00:00, 106MB/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "label2num = {\"Monitoring environmental impact\": 1,\n",
        "              \"Preventing pollution\": 2,\n",
        "              \"Strengthening ecosystems\": 3,\n",
        "              \"Reducing use\": 4,\n",
        "              \"Reusing\": 5,\n",
        "              \"Recycling\": 6,\n",
        "              \"Repurposing\":7,\n",
        "              \"Encouraging and supporting others\": 8,\n",
        "              \"Educating and training for sustainability\": 9,\n",
        "              \"Creating sustainable products and processes\": 10,\n",
        "              \"Embracing innovation for sustainability\": 11,\n",
        "              \"Changing how work is done\": 12,\n",
        "              \"Choosing responsible alternatives\": 13,\n",
        "              \"Instituting programs and policies\": 14,\n",
        "              \"Others\":15}\n",
        "\n",
        "for key, val in label2num.items():\n",
        "  label2num[key] = val-1\n",
        "\n",
        "def datasetclearning(df):\n",
        "  # Select two relevant columns from the original dataset\n",
        "  df = df[['Major_Industry','description_behavior', 'consequences','Specific_cat']]\n",
        "  df['description_behavior'] = df['description_behavior']+' '+df['consequences']\n",
        "  # Collapse the data for the NLP paper\n",
        "  other = [ \"Putting environmental interests first\", \"Lobbying and activism\"]\n",
        "  # Collapse \"Putting environmental interest first\" and \"Lobbying and Activism\" into \"the Other\" category\n",
        "  replacement = df['Specific_cat'].where(df['Specific_cat'].isin(other) == False, \"Others\")\n",
        "  df = pd.concat([df['description_behavior'], replacement], axis =1)\n",
        "  num2label = {y:x for x,y in label2num.items()}\n",
        "  # Convert text label to number numeric\n",
        "  df_label = df['Specific_cat'].map(label2num)\n",
        "  df_label.name = 'label'\n",
        "  df = pd.concat([df,df_label], axis = 1)\n",
        "  # Retain only the behaviors and numberic label\n",
        "  df = df[['description_behavior','label']]\n",
        "  # Rename the columns of output dataframe\n",
        "  df.columns = ['text', 'label']\n",
        "  df['text'] = df['text'].str.replace(\"\\n \", \" \")\n",
        "  # Return the dataframe\n",
        "  return [df, num2label]"
      ],
      "metadata": {
        "id": "zJxbuq8tPRZw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import openai\n",
        "import pandas as pd\n",
        "\n",
        "from openai import OpenAI\n",
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "df = pd.read_csv(\"/content/German_French_UK_China_MENA_040420.csv\")\n",
        "df, num2label = datasetclearning(df)\n",
        "\n",
        "def test_train_split(df, seed):\n",
        "  X_train, X_test, Y_train, Y_test, indices_train, indices_test = train_test_split(df['text'], df['label'], df.index, test_size=2449, random_state = seed, stratify = df['label'])\n",
        "  return [list(X_train), list(X_test), list(Y_train), list(Y_test)]"
      ],
      "metadata": {
        "id": "hTL4dINeVkI3",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6b889816-9155-4404-8a45-9cbc89a20502"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-3-2814818bdd97>:23: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  df['description_behavior'] = df['description_behavior']+' '+df['consequences']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def data_prep(df, seed):\n",
        "  # 1. Test-train split (Test set has 2449 cases across simulation conditions)\n",
        "  X_train, X_test, Y_train, Y_test = test_train_split(df, seed)\n",
        "  return X_train, X_test, Y_train, Y_test"
      ],
      "metadata": {
        "id": "JmbJrfELfs9G"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\", \".join(label2num.keys())\n",
        "prompting_0 = f\"We have these following classes: [{', '.join(label2num.keys())}]\"\n",
        "\n",
        "\n",
        "client = OpenAI(\n",
        "    api_key=\"PERSONAL-API-KEY-HERE\",\n",
        ")\n",
        "\n",
        "def ask_p0(content):\n",
        "  prompt = prompting_0 + f\"\"\"\\nNow please classify the following content into one of the above classes:\n",
        "\n",
        "  ###\n",
        "  {content}\n",
        "  ###\n",
        "\n",
        "  Return the predicted class. Do not include any other kind of output.\"\"\"\n",
        "  completion = client.chat.completions.create(\n",
        "  model=\"gpt-4o\",\n",
        "  messages=[\n",
        "    {\"role\": \"user\", \"content\": prompt,}])\n",
        "  m = completion.choices[0].message.content\n",
        "  return m"
      ],
      "metadata": {
        "id": "HC_Quhw7Vjrj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import classification_report\n",
        "\n",
        "p0_precisions = []\n",
        "p0_recalls = []\n",
        "p0_f1s = []\n",
        "\n",
        "from collections import defaultdict\n",
        "for seed in range(1, 6):\n",
        "  print(f\"current seed: {seed}\")\n",
        "  X_train, X_test, Y_train, Y_test = data_prep(df, seed)\n",
        "\n",
        "  predictions_p0 = []\n",
        "  for i in range(len(X_test)):\n",
        "    if i % 100 == 0:\n",
        "      print(f\"current index: {i}\")\n",
        "    predicted_class = ask_p0(X_test[i])\n",
        "    number_failures = 1\n",
        "    while predicted_class not in label2num.keys():\n",
        "      print(predicted_class, f\"index = {i}, f = {number_failures}, try again\")\n",
        "      number_failures += 1\n",
        "      predicted_class = ask_p0(X_test[i])\n",
        "    predictions_p0.append(label2num[predicted_class])\n",
        "\n",
        "  metrics_p0 = classification_report(Y_test, predictions_p0, output_dict = True)\n",
        "  p0_precisions.append(metrics_p0['weighted avg'].get('precision'))\n",
        "  p0_recalls.append(metrics_p0['weighted avg'].get('recall'))\n",
        "  p0_f1s.append(metrics_p0['weighted avg'].get('f1-score'))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kvfCib85ZIad",
        "outputId": "0c252b44-bd3b-48e9-9b2f-2874d40c08a7"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "current seed: 1\n",
            "current index: 0\n",
            "current index: 100\n",
            "current index: 200\n",
            "current index: 300\n",
            "current index: 400\n",
            "current index: 500\n",
            "current index: 600\n",
            "current index: 700\n",
            "current index: 800\n",
            "current index: 900\n",
            "current index: 1000\n",
            "Using Cradle to Cradle concept materials in building is primarily about creating sustainable products and processes. index = 1091, f = 1, try again\n",
            "current index: 1100\n",
            "current index: 1200\n",
            "Participating in multimobil-Tag initiative reduce carbon footprint\n",
            "Choosing responsible alternatives index = 1204, f = 1, try again\n",
            "current index: 1300\n",
            "Reducing use, Reusing, Recycling index = 1372, f = 1, try again\n",
            "current index: 1400\n",
            "current index: 1500\n",
            "current index: 1600\n",
            "current index: 1700\n",
            "current index: 1800\n",
            "current index: 1900\n",
            "current index: 2000\n",
            "Participating in the Low Carbon Technology Partnership initiative (LCTPi) to reduce carbon emissions falls under the class:\n",
            "Preventing pollution index = 2029, f = 1, try again\n",
            "current index: 2100\n",
            "[Monitoring environmental impact] index = 2105, f = 1, try again\n",
            "current index: 2200\n",
            "current index: 2300\n",
            "Improving processes such as cooling towers aligns closely with `Creating sustainable products and processes`. It involves making a specific system (cooling towers) more efficient and sustainable through process enhancement. index = 2334, f = 1, try again\n",
            "current index: 2400\n",
            "current seed: 2\n",
            "current index: 0\n",
            "current index: 100\n",
            "Using internet to smartly control lamp lights Reduce energy usage: Embracing innovation for sustainability index = 180, f = 1, try again\n",
            "current index: 200\n",
            "current index: 300\n",
            "current index: 400\n",
            "[Monitoring environmental impact] index = 486, f = 1, try again\n",
            "current index: 500\n",
            "current index: 600\n",
            "Investing in sustainability index = 658, f = 1, try again\n",
            "current index: 700\n",
            "current index: 800\n",
            "current index: 900\n",
            "current index: 1000\n",
            "current index: 1100\n",
            "current index: 1200\n",
            "current index: 1300\n",
            "current index: 1400\n",
            "current index: 1500\n",
            "Introducing programs and policies index = 1536, f = 1, try again\n",
            "current index: 1600\n",
            "#### Reducing use index = 1654, f = 1, try again\n",
            "current index: 1700\n",
            "Supporting nature conservation projects initiated by the Deutsche Umwelthilfe (DUH) conserve nature can be classified under the following class:\n",
            "\n",
            "[Encouraging and supporting others] index = 1788, f = 1, try again\n",
            "Supporting and encouraging others index = 1788, f = 2, try again\n",
            "current index: 1800\n",
            "current index: 1900\n",
            "current index: 2000\n",
            "current index: 2100\n",
            "Reducing use, Reusing, Recycling index = 2114, f = 1, try again\n",
            "current index: 2200\n",
            "current index: 2300\n",
            "current index: 2400\n",
            "current seed: 3\n",
            "current index: 0\n",
            "current index: 100\n",
            "current index: 200\n",
            "Implementing a new air purification process through fine paint droplets to reduce carbon footprint falls under the class: Embracing innovation for sustainability. index = 210, f = 1, try again\n",
            "current index: 300\n",
            "current index: 400\n",
            "current index: 500\n",
            "Finance the “green” projects selected by Natixis Energéco in the areas of design, construction and/or maintenance of renewable energy production sites increase investment in renewable energy \n",
            "\n",
            "\n",
            "Creating sustainable products and processes index = 582, f = 1, try again\n",
            "current index: 600\n",
            "current index: 700\n",
            "current index: 800\n",
            "current index: 900\n",
            "current index: 1000\n",
            "current index: 1100\n",
            "current index: 1200\n",
            "current index: 1300\n",
            "current index: 1400\n",
            "Increasing energy use index = 1486, f = 1, try again\n",
            "current index: 1500\n",
            "current index: 1600\n",
            "current index: 1700\n",
            "current index: 1800\n",
            "current index: 1900\n",
            "current index: 2000\n",
            "current index: 2100\n",
            "current index: 2200\n",
            "current index: 2300\n",
            "current index: 2400\n",
            "current seed: 4\n",
            "current index: 0\n",
            "Protecting forests index = 47, f = 1, try again\n",
            "Improving our production process for isononanol (INA) increase production efficiency\n",
            "\n",
            "Class: Creating sustainable products and processes index = 91, f = 1, try again\n",
            "current index: 100\n",
            "current index: 200\n",
            "current index: 300\n",
            "current index: 400\n",
            "Increasing the supply of sustainable oil available on the market by a volume equal to Henkel’s demand in 2020 increase sustainable products\n",
            "\n",
            "Creating sustainable products and processes index = 435, f = 1, try again\n",
            "current index: 500\n",
            "current index: 600\n",
            "current index: 700\n",
            "current index: 800\n",
            "current index: 900\n",
            "current index: 1000\n",
            "[Reducing use] index = 1085, f = 1, try again\n",
            "current index: 1100\n",
            "current index: 1200\n",
            "current index: 1300\n",
            "current index: 1400\n",
            "current index: 1500\n",
            "current index: 1600\n",
            "Reusing, Recycling, Repurposing index = 1664, f = 1, try again\n",
            "current index: 1700\n",
            "current index: 1800\n",
            "current index: 1900\n",
            "current index: 2000\n",
            "Reducing use, Reusing, Recycling index = 2053, f = 1, try again\n",
            "current index: 2100\n",
            "current index: 2200\n",
            "Reducing use, Reusing, Recycling index = 2267, f = 1, try again\n",
            "current index: 2300\n",
            "current index: 2400\n",
            "current seed: 5\n",
            "current index: 0\n",
            "current index: 100\n",
            "current index: 200\n",
            "current index: 300\n",
            "current index: 400\n",
            "current index: 500\n",
            "current index: 600\n",
            "current index: 700\n",
            "Implementing green optimization services preserve the environment is classified under **Embracing innovation for sustainability** index = 711, f = 1, try again\n",
            "Strengthening ecosystems  index = 747, f = 1, try again\n",
            "current index: 800\n",
            "current index: 900\n",
            "current index: 1000\n",
            "current index: 1100\n",
            "current index: 1200\n",
            "current index: 1300\n",
            "current index: 1400\n",
            "current index: 1500\n",
            "current index: 1600\n",
            "current index: 1700\n",
            "current index: 1800\n",
            "Improving post-process treatment systems can be classified under: Creating sustainable products and processes index = 1866, f = 1, try again\n",
            "current index: 1900\n",
            "current index: 2000\n",
            "Implementing programs and policies index = 2034, f = 1, try again\n",
            "current index: 2100\n",
            "Outsourced waste handling and recycling reduced waste: Recycling index = 2104, f = 1, try again\n",
            "current index: 2200\n",
            "Reducing use, Reusing, Recycling index = 2230, f = 1, try again\n",
            "Reusing, Recycling index = 2230, f = 2, try again\n",
            "current index: 2300\n",
            "Joining the \"Zero Discharge of Hazardous Chemicals\" coalition prevents harm to the environment falls under the class:\n",
            "\n",
            "Preventing pollution index = 2393, f = 1, try again\n",
            "current index: 2400\n",
            "Participating in events related to carbon markets and promoting achievements in low-carbon transformation, energy-saving, and emission reduction can be categorized under:\n",
            "\n",
            "[Encouraging and supporting others] index = 2403, f = 1, try again\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "\n",
        "print(p0_precisions, p0_recalls, p0_f1s)\n",
        "\n",
        "print(\"mean precision p0\", np.mean(p0_precisions))\n",
        "print(\"mean recall p0\", np.mean(p0_recalls))\n",
        "print(\"mean f1 p0\", np.mean(p0_f1s))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "d0Rwp6iPfbGo",
        "outputId": "64756b05-b9dd-4cc5-aced-43b5b271566a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0.4726746031746032, 0.5456355866355866, 0.5587649695505733, 0.45763636363636356, 0.5562085137085137] [0.47, 0.49, 0.52, 0.46, 0.47] [0.4396023482619592, 0.4605843396369712, 0.501103364282776, 0.41281635907951697, 0.46645782005204756]\n",
            "mean precision p0 0.5181840073411281\n",
            "mean recall p0 0.48200000000000004\n",
            "mean f1 p0 0.45611284626265425\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "prompting_0"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        },
        "id": "QQV1xb6_PTXR",
        "outputId": "7139bfda-6534-4fe9-bf5d-6d4535af9fec"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'We have these following classes: [Monitoring environmental impact, Preventing pollution, Strengthening ecosystems, Reducing use, Reusing, Recycling, Repurposing, Encouraging and supporting others, Educating and training for sustainability, Creating sustainable products and processes, Embracing innovation for sustainability, Changing how work is done, Choosing responsible alternatives, Instituting programs and policies, Others]'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 116
        }
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}